*! version 5.0
* 13 August 2018
* NIDS

* THIS IS A FOOD AND NON-FOOD EXPENDITURE DO FILE: 9 OF 14

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "W1 Food_NonFood Expenditure - Master  Food_NonFood Expenditure do file  (1 of 14).do"

*=====================================================================================================================================
* SETTING UP STATA TO RUN DO FILES

clear
cap clear matrix
set more off 

use "$DataOUT\tempdata7.dta", clear

**********************************************************************
*					Non-Food 3
***			Aggregate Imputations and Overview
**********************************************************************

*Counts the Number of non-food items imputed for each household
egen nfoodsubimp=rowtotal(e*imputed)

**********************************************************************
***			Subaggregates (using Regression Imputation)
**********************************************************************

egen viceexp = rowtotal(e1impute e2impute)
egen leisureexp = rowtotal(e3impute-e13impute)
egen transportexp = rowtotal(e14impute-e16impute)
egen utilitiesexp = rowtotal(e17impute-e21impute)
egen insuranceexp =rowtotal(e22impute-e25impute)
egen householdexp=rowtotal(e26impute-e31impute)
egen clothesexp=rowtotal(e32impute-e34impute)
egen healthexp = rowtotal(e35impute-e40impute)
egen educationexp =rowtotal(e41impute-e44impute)
egen miscexp=rowtotal(e45impute-e52impute)

**********************************************************************
***			Imputing Missing Aggregates
*				Regression Method
**********************************************************************

*Aggregates all non-food consumption for each household from regression Imputation data
egen nfooda = rowtotal(e*impute)

*Accounts for the rowtotal summing method
replace nfooda=. if nfooda==0

gen lgnfooda = log(nfooda)
reg lgnfooda lgincome w1_h_dwlrms westerncape easterncape northerncape freestate kwazulunatal northwest gauteng ///
mpumalanga urban hhsizer maxage fammatric  Asian White Coloured grants anychildren

impute lgnfooda lgincome w1_h_dwlrms westerncape easterncape northerncape freestate kwazulunatal northwest gauteng ///
mpumalanga  urban  hhsizer maxage fammatric  Asian White Coloured grants anychildren, gen(lgnfood)

gen nfood=exp(lgnfood)

**********************************************************************
***				Imputing Missing Aggregates
*					Median Measure
**********************************************************************

egen medianimptotal =rowtotal(median*imp)
replace medianimptotal=. if medianimptotal==0

egen psumedianimp = median(medianimptotal), by(w1_cluster)
egen npsucount = count(medianimptotal), by(w1_cluster)

replace medianimptotal=psumedianimp if medianimptotal==.&npsucount>2

egen provmedianmedianimp = median(medianimptotal), by(province)

replace medianimptotal=provmedianmedianimp if medianimptotal==.

*********************************************************************
***				Imputation Rates
**********************************************************************

gen nfoodimputed=1 if nfood!=.&nfooda==.
replace nfoodimputed=0 if nfood==. | nfooda!=.

gen nfoodimputedpartial=1 if nfoodimputed==1|nfoodsubimp>0
replace nfoodimputedpartial =0 if nfoodimputed==0&nfoodsubimp==0

sum nfoodimputed nfoodimputedpartial
count if nfoodimputed==1

**********************************************************************
***				Comparisons
**********************************************************************

sum medianimptotal nfood nfoodtot, detail

sum medianimptotal nfood nfoodtot if nfoodimputed==1, detail
sum medianimptotal nfood nfoodtot if nfoodimputedpartial==1, detail

gen lgmedtotalimps = log(medianimptotal) if nfoodimputedpartial==1
gen lgregtotalimps = log(nfood) if nfoodimputedpartial==1
gen lgrawtotal =log(nfoodtot) if nfoodimputedpartial==0

*scatter medianimptotal nfood if nfoodimputedpartial==1
*scatter lgmedtotalimps lgregtotalimps 
*twoway (kdensity lgrawtotal ) (kdensity lgmedtotalimps ) (kdensity lgregtotalimps), legend(order(1 "Raw aggregate hh not partial 
*imputed" 2 "Median imputation hh partial imputed" 3 "Regression imputation hh partial imputed"))
drop lgmedtotalimps lgregtotalimps lgrawtotal 

**********************************************************************
***				Cleaning Up, Dropping, Labelling
**********************************************************************

** Drops the variables used for cell mean and regression imputations
drop npsumedian*  psu*count e2_2_*lg nprovmedian* pr* e*imputelg psumedianimp npsucount e*median
drop  ndismedian* dis*count n*counter disnsize* disnrate* psunsize* psunrate*

*Drops the variables associated with the rates of imputation
drop  impd* imppercc* e*imputeS nsmall* 

*Drops the variables used to analyze the imputed variables alone
drop  nfmedian*meds nfimp*imps  ntotalmeds ntotalimps

**Renaming and labelling of the key variables
forvalues b=1/52 {
rename e`b'impute nfreg`b'
label var nfreg`b' "Consumption of non-food item `b' using Regression Imputation"
}
forvalues a=1/52{
rename e`a'imputed nf`a'imputed
label var nf`a'imputed "Dummy Variable for Non-Food Item `a' being an imputed value"
}
forvalues a=1/52{
rename median`a'imp nfmed`a'
label var nfmed`a' "Consumption of non-food item `a' using Cell Median Imputation"
}

label var countNoNFood "Number of Food Items Consumed by this Household"
rename nfoodsubimp nfooditemimps
label var nfooditemimps "Number of Non-Food Items Imputed for this Household"

rename nfood nfregagg
label var nfregagg "Aggregate Non-Food Consumption using Regression Imputation"

rename medianimptotal nfmedagg
label var nfmedagg "Aggregate Non-Food Consumption using Cell Median Imputation"

*Drops the aggregates used before aggregate imputation
drop nfooda lgnfood lgnfooda

drop tax

*Labelling variables
label var nfoodtot "Aggregate Non-Food Consumption Raw (no imputations)"
label var viceexp "Aggregate Expenditure on Cigarrettes and Alcohol with Imputations"
label var leisureexp "Aggregate Expenditure on Leisure and Entertainment with  Imputations"
label var transportexp "Aggregate Expenditure on private Transport with  Imputations"
label var utilitiesexp "Aggregate Expenditure on Utilities and Municipal Rates with Imputations"
label var insuranceexp "Aggregate Expenditure on Insurance with  Imputations"
label var householdexp "Aggregate Expenditure on Household goods and durables with  Imputations"
label var clothesexp "Aggregate Expenditure on Clothing with  Imputations"
label var healthexp "Aggregate Expenditure on Health Care with  Imputations"
label var educationexp "Aggregate Expenditure on Education with  Imputations"
label var miscexp "Aggregate Expenditure on Miscellaneous/Expenditure with  Imputations"

***Overall Imputaton Rates
label var nfoodimputed "Dummy Variable for Aggregate Non-Food value being totally imputed"
label var nfoodimputedpartial "Dummy Variable for Non-Food aggregate having any of its components imputed"

*------------------------------------------------------------------------------------------------------------------

save "$DataOUT\tempdata8.dta", replace

